Analysis date: 2021-03-30
library(plyr)
library(gtools)
library(pheatmap)
library(Matrix)
library(Hmisc)
library(ggpubr)
library(readxl)
library(DESeq2)
library(tidyverse)
library(limma)
library(apeglm)
library(biomartr)
library(biomaRt)
library(MultiAssayExperiment)
library(gplots)
library(grid)
library(cowplot)
load("data/multiomics_MAE.RData")
source("data/Figure_layouts.R")
KEGG <- read.table("data/c2.cp.kegg.v7.0.symbols.gmt", sep = "/")
KEGG <- KEGG %>% as_tibble() %>% mutate(pathway = gsub("\thttp:", "", V1),
genes=V7)
BCAA_genes <- KEGG %>% filter(pathway == "KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
proteasome_genes <- KEGG %>% filter(pathway == "KEGG_PROTEASOME") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
splice_genes <- KEGG %>% filter(pathway == "KEGG_SPLICEOSOME") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
BCR_genes <- KEGG %>% filter(pathway == "KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY") %>% dplyr::select(genes) %>% unlist() %>% strsplit2("\t") %>% .[,-1]
change_chr_abber_brackets <- function(levels_alt){
levels_alt %>%
str_replace(., "del", "del(") %>% str_replace(., "gain", "gain(") %>%
str_replace(., "q", ")(q") %>% str_replace(., "p", ")(p") %>%
str_replace(., "\\)\\(prim", "prim") %>%
str_replace(., "11\\)\\(q", "11)(q22.3)") %>%
str_replace(., "p13", "p13)") %>% str_replace(., "q14", "q14)") %>%
str_replace(., "q24", "q24)")
}
prot_few_nas <- multiomics_MAE[["proteomics"]] %>% is.na() %>% rowSums()
prot_few_nas <- prot_few_nas[ prot_few_nas == 0 ] %>% names()
#ensembl=useMart("ensembl")
#ensembl = useDataset("hsapiens_gene_ensembl",mart=ensembl)
#genemap <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "start_position", "end_position" , "chromosome_name", "description"),
# filters = "hgnc_symbol", values = (prot_few_nas %>% unique), mart = ensembl)
#genemap <- genemap %>% as_tibble() %>% mutate(mean_position=(start_position + end_position)/2)
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/R_objects/ensembl_proteins_location.RData")
metadata(multiomics_MAE)$gene_symbol_mapping <- left_join(metadata(multiomics_MAE)$gene_symbol_mapping, genemap)
proteomics_tbl_meta_biomart_chrab <- wideFormat(multiomics_MAE[,,"chrom_abber"]) %>% as_tibble()
proteomics_tbl_meta_biomart_chrab <- mutate_if(proteomics_tbl_meta_biomart_chrab, is.numeric, as.logical)
proteomics_tbl_meta_biomart_chrab <- proteomics_tbl_meta_biomart_chrab %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_chrab[, -1])[
apply(proteomics_tbl_meta_biomart_chrab[, -1] ,2, function(cl){ all(table(cl)>2) } )
] )
proteomics_tbl_meta_biomart_chrab <- proteomics_tbl_meta_biomart_chrab %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_chrab[, -1])[
apply(proteomics_tbl_meta_biomart_chrab[, -1] ,2, function(cl){ length(table(cl))>1 } )
] )
proteomics_tbl_meta_biomart_SNP <- wideFormat(multiomics_MAE[,,"SNPs"]) %>% as_tibble()
proteomics_tbl_meta_biomart_SNP <- mutate_if(proteomics_tbl_meta_biomart_SNP, is.numeric, as.logical)
proteomics_tbl_meta_biomart_SNP <- proteomics_tbl_meta_biomart_SNP %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_SNP[, -1])[
apply(proteomics_tbl_meta_biomart_SNP[, -1] ,2, function(cl){ all(table(cl)>2) } )
] )
proteomics_tbl_meta_biomart_SNP <- proteomics_tbl_meta_biomart_SNP %>% dplyr::select(primary,
colnames(proteomics_tbl_meta_biomart_SNP[, -1])[
apply(proteomics_tbl_meta_biomart_SNP[, -1] ,2, function(cl){ length(table(cl))>1 } )
] )
proteomics_tbl_meta_biomart_health <- wideFormat(multiomics_MAE[,,"health_record_bin"]) %>% as_tibble() %>% dplyr::select(primary, health_record_bin_IGHV_mutated, health_record_bin_elderly_at_diagnosis, health_record_bin_treated)
proteomics_tbl_meta_biomart_health <- mutate_if(proteomics_tbl_meta_biomart_health, is.numeric, as.logical)
proteomics_tbl_meta_biomart <- left_join(
left_join((
longFormat(multiomics_MAE[,,"proteomics"], colDataCols = c("trisomy12", "IGHV_mutated") ) %>%
as_tibble() %>%
#mutate(IGHV= if_else(IGHV %in% c("M", "U"), IGHV, "NA") )
mutate(IGHV=IGHV_mutated)
),
metadata(multiomics_MAE)$gene_symbol_mapping[c(2,3,5:7)],
by=c("rowname"="hgnc_symbol")),
proteomics_tbl_meta_biomart_chrab, by=c("primary"))
proteomics_tbl_meta_biomart <- left_join( left_join(proteomics_tbl_meta_biomart,
proteomics_tbl_meta_biomart_SNP, by=c("primary")),
proteomics_tbl_meta_biomart_health, by=c("primary"))
drug_and_proteomics_prot <- longFormat(multiomics_MAE[,,c("proteomics")]) %>% as_tibble()
drug_and_proteomics_drug <- longFormat(multiomics_MAE[,,c("drug_resp_mono")]) %>% as_tibble()
drug_and_proteomics_drug <- drug_and_proteomics_drug %>% separate(rowname, into = c("Drug", "conc"), sep = "_") %>%
group_by(assay, primary, rowname=Drug, colname ) %>%
dplyr::summarise(value=mean(value, na.rm=TRUE)) %>% ungroup()
drug_and_proteomics <- bind_rows(drug_and_proteomics_prot, drug_and_proteomics_drug)
pats_drug_and_prot <- drug_and_proteomics %>% group_by(primary) %>% dplyr::summarise(nassay=length(unique(assay))) %>% dplyr::filter(nassay==2) %>% .$primary
drug_and_proteomics <- drug_and_proteomics %>% dplyr::filter(primary %in% pats_drug_and_prot)
all_prot <- drug_and_proteomics %>% dplyr::filter(assay=="proteomics", rowname %in% prot_few_nas) %>% .$rowname %>% unique()
all_drugs <- drug_and_proteomics %>% dplyr::filter(assay=="drug_resp_mono") %>% .$rowname %>% unique()
pat_overlap_prot_RNA <- colnames(intersectColumns(multiomics_MAE[,,c("proteomics", "RNAseq_norm")]))[["proteomics"]]
proteomics_tbl_meta_biomart
experiments(multiomics_MAE)
## ExperimentList class object of length 8:
## [1] SNPs: matrix with 8 rows and 80 columns
## [2] drug_resp_mono: data.frame with 130 rows and 81 columns
## [3] chrom_abber: data.frame with 5 rows and 81 columns
## [4] health_record_bin: matrix with 6 rows and 81 columns
## [5] health_record_scaled: matrix with 6 rows and 81 columns
## [6] proteomics: matrix with 7311 rows and 68 columns
## [7] RNAseq_full: DESeqDataSet with 63214 rows and 59 columns
## [8] RNAseq_norm: data.frame with 9273 rows and 59 columns
as_tibble(colData(multiomics_MAE))
multiomics_MAE
## A MultiAssayExperiment object of 8 listed
## experiments with user-defined names and respective classes.
## Containing an ExperimentList class object of length 8:
## [1] SNPs: matrix with 8 rows and 80 columns
## [2] drug_resp_mono: data.frame with 130 rows and 81 columns
## [3] chrom_abber: data.frame with 5 rows and 81 columns
## [4] health_record_bin: matrix with 6 rows and 81 columns
## [5] health_record_scaled: matrix with 6 rows and 81 columns
## [6] proteomics: matrix with 7311 rows and 68 columns
## [7] RNAseq_full: DESeqDataSet with 63214 rows and 59 columns
## [8] RNAseq_norm: data.frame with 9273 rows and 59 columns
## Features:
## experiments() - obtain the ExperimentList instance
## colData() - the primary/phenotype DFrame
## sampleMap() - the sample availability DFrame
## `$`, `[`, `[[` - extract colData columns, subset, or experiment
## *Format() - convert into a long or wide DFrame
## assays() - convert ExperimentList to a SimpleList of matrices
assay(multiomics_MAE[prot_few_nas , ,"proteomics"])[1:5,1:5]
## harmonizing input:
## removing 522 sampleMap rows not in names(experiments)
## removing 13 colData rownames not in sampleMap 'primary'
## CLL_1 CLL_4 CLL_8 CLL_20 CLL_24
## A2M -0.23106599 -0.123775317 -0.20324611 -0.11422224 0.05369119
## AAAS -0.04446032 -0.003929938 -0.10881390 0.21728422 -0.24019198
## AACS 0.18091637 -0.469318906 0.01926965 0.46120353 0.27485862
## AAGAB 0.11577450 0.129642752 0.10785538 -0.06247763 0.25647801
## AAMDC 0.21443028 -0.003820597 -0.09936349 0.62120261 -0.26938714
dim(assay(multiomics_MAE[prot_few_nas , ,"proteomics"]))
## harmonizing input:
## removing 522 sampleMap rows not in names(experiments)
## removing 13 colData rownames not in sampleMap 'primary'
## [1] 7311 68
print(paste( round( (!(assay(multiomics_MAE[,,"proteomics"]) %>% is.na() ) ) %>% colSums() %>% mean) ,
"proteins per sample were detected on average in proteomics data"))
## [1] "7311 proteins per sample were detected on average in proteomics data"
print(paste( round( (!(assay(multiomics_MAE[,,"RNAseq_full"]) %>% is.na() ) ) %>%
colSums() %>% mean) ,
"transcipts per sample were detected on average in RNASeq data"))
## [1] "63214 transcipts per sample were detected on average in RNASeq data"
if(!any(is.na(assay(multiomics_MAE[,,"RNAseq_full"])))){
print("No NAs in RNASeq dataset")
}
## [1] "No NAs in RNASeq dataset"
print( paste( ( (assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) %>% sum , "different transcripts were detected"))
## [1] "40083 different transcripts were detected"
print(
paste(
multiomics_MAE@metadata$gene_symbol_mapping %>%
filter(ensembl_gene_id %in%
names((assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) ) %>%
dplyr::select(hgnc_symbol) %>% unique %>%
filter(hgnc_symbol != "") %>%
nrow,
"transcripts with unique hgnc symbols"))
## [1] "35402 transcripts with unique hgnc symbols"
print( paste(
(multiomics_MAE@metadata$gene_symbol_mapping %>%
filter(ensembl_gene_id %in%
names((assay(multiomics_MAE[,,"RNAseq_full"]) %>% rowMeans() ) > 0 ) ) %>%
dplyr::select(hgnc_symbol) %>% unique %>%
filter(hgnc_symbol != "") %>% .$hgnc_symbol %in%
rownames(multiomics_MAE@ExperimentList$proteomics) ) %>% sum,
"matching proteins and transcripts detected"))
## [1] "7199 matching proteins and transcripts detected"
protpats <- multiomics_MAE@sampleMap %>% as_tibble() %>% filter(assay=="proteomics") %>% .$primary %>% unique
message("Available datasets for proteomics patients:")
multiomics_MAE[,protpats,]@sampleMap %>% .$assay %>% table
## .
## SNPs drug_resp_mono chrom_abber
## 67 68 68
## health_record_bin health_record_scaled proteomics
## 68 68 68
## RNAseq_full RNAseq_norm
## 59 59
order_oncoplot <- longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
group_by(rowname) %>%
summarise(total=sum(value, na.rm = TRUE)) %>%
arrange(total ) %>%
.$rowname %>% change_chr_abber_brackets
tmp <- wideFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble()
colnames(tmp) <- colnames(tmp) %>% gsub("SNPs_|chrom_abber_", "",.) %>%
change_chr_abber_brackets
for(l in 1:length(order_oncoplot)){
tmp <- tmp %>% arrange_( as.name(order_oncoplot[l] ) )
}
onco_center <-
longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
mutate(alteration= if_else(value==1, assay, as.character(value))) %>%
mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
ggplot(aes(primary, rowname, fill=alteration)) +
geom_tile(color="black") +
scale_fill_manual(values=c("white", "orange1", "#ca0020", "grey"), labels=c("wt", "CNV", "SNV", "NA" ), na.translate=FALSE) +
scale_y_discrete(limits=order_oncoplot) +
scale_x_discrete(limits=rev(tmp$primary)) +
xlab("Patients") +
theme(panel.background = element_rect(fill= "darkgrey"), panel.grid = element_blank(),
axis.text.x = element_blank(),
#axis.title = element_blank(),
axis.title.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom", legend.key.size = unit(10, "pt")) +
guides(fill=guide_legend(title = NULL ))
onco_right <-longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
ggplot(aes(rowname, value)) + geom_col(aes(fill=assay)) +
scale_x_discrete(limits=order_oncoplot) +
coord_flip()+
scale_fill_manual(values=c("orange1", "#ca0020")) +
theme(panel.background = element_rect(fill= "white"), panel.grid = element_blank(), axis.title = element_blank(),
axis.line.x = element_line(color="black"))
onco_right_perc <- longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble() %>%
group_by(rowname) %>%
dplyr::summarise(perc=round(sum(value, na.rm = TRUE)/ sum(!is.na(value)) , 2) ) %>%
mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
ggplot(aes(rowname, 1, label=paste0(perc*100, "%") )) +
geom_text() + coord_flip() + theme_void() +
scale_x_discrete(limits=order_oncoplot)
onco_right_total <-
longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>% as_tibble() %>%
group_by(rowname) %>%
dplyr::summarise(total=sum(value, na.rm = TRUE )) %>%
mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
ggplot(aes(rowname, 1, label=total)) +
geom_text() + coord_flip() + theme_void() +
scale_x_discrete(limits=order_oncoplot)
onco_top <-longFormat(multiomics_MAE[,,c("SNPs","chrom_abber")]) %>%
as_tibble() %>%
mutate(rowname = change_chr_abber_brackets(rowname) ) %>%
ggplot(aes(primary, value)) + geom_col(aes(fill=assay)) +
scale_x_discrete(limits=rev(tmp$primary)) +
scale_fill_manual(values=c("orange1", "#ca0020")) +
theme(panel.background = element_rect(fill= "white"), panel.grid = element_blank(), axis.title = element_blank(),
axis.line.x = element_line(color="black"))
p1 <- insert_yaxis_grob(onco_center,get_panel(onco_right_total) , grid::unit(.2, "null"), position = "right")
p1.2<- insert_yaxis_grob(p1, get_panel(onco_right), grid::unit(.1, "null"), position = "right")
p1.3<- insert_yaxis_grob(p1.2, get_panel(onco_right_perc), grid::unit(.2, "null"), position = "right")
oncoplot <- insert_xaxis_grob(p1.3, onco_top, grid::unit(.2, "null"), position = "top")
ggdraw(oncoplot)
drs_sel <- metadata(multiomics_MAE)$drugs_functional_groups %>%
filter( !( grepl("DMSO", Drug) ) ) %>%
mutate(Pathways=if_else(Pathways=="Apoptosis", "Inducer/inhibitor of apoptosis", Pathways))
order_drugs <- drs_sel$Pathways %>% table() %>% as_tibble() %>% arrange(desc(n)) %>% .$.
drug_nrs <- drs_sel %>%
ggplot(aes(Pathways)) + geom_bar() +
pp_sra_noguides_tilted +
scale_x_discrete(limits=order_drugs)
drug_nrs
save(all_drugs,
BCAA_genes, proteasome_genes, splice_genes, BCR_genes,
drug_and_proteomics, drug_and_proteomics_drug,
multiomics_MAE,
pat_overlap_prot_RNA,
prot_few_nas,
proteomics_tbl_meta_biomart,
proteomics_tbl_meta_biomart_chrab,
proteomics_tbl_meta_biomart_health,
proteomics_tbl_meta_biomart_SNP,
file="data/CLL_Proteomics_Setup.RData")
save(oncoplot,
file = "RData_plots/CLL_Proteomics_Setup_Plots.RData")
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] cowplot_1.1.1 gplots_3.1.1
## [3] MultiAssayExperiment_1.14.0 biomaRt_2.44.4
## [5] biomartr_0.9.2 apeglm_1.10.0
## [7] limma_3.44.3 forcats_0.5.1
## [9] stringr_1.4.0 dplyr_1.0.3
## [11] purrr_0.3.4 readr_1.4.0
## [13] tidyr_1.1.2 tibble_3.0.6
## [15] tidyverse_1.3.0 DESeq2_1.28.1
## [17] SummarizedExperiment_1.18.2 DelayedArray_0.14.1
## [19] matrixStats_0.58.0 Biobase_2.48.0
## [21] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [23] IRanges_2.22.2 S4Vectors_0.26.1
## [25] BiocGenerics_0.34.0 readxl_1.3.1
## [27] ggpubr_0.4.0 Hmisc_4.4-2
## [29] ggplot2_3.3.3 Formula_1.2-4
## [31] survival_3.2-7 lattice_0.20-41
## [33] Matrix_1.3-2 pheatmap_1.0.12
## [35] gtools_3.8.2 plyr_1.8.6
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 BiocFileCache_1.12.1 splines_4.0.2
## [4] BiocParallel_1.22.0 digest_0.6.27 htmltools_0.5.1.1
## [7] magrittr_2.0.1 checkmate_2.0.0 memoise_2.0.0
## [10] cluster_2.1.0 openxlsx_4.2.3 Biostrings_2.56.0
## [13] annotate_1.66.0 modelr_0.1.8 askpass_1.1
## [16] bdsmatrix_1.3-4 prettyunits_1.1.1 jpeg_0.1-8.1
## [19] colorspace_2.0-0 rappdirs_0.3.3 blob_1.2.1
## [22] rvest_0.3.6 haven_2.3.1 xfun_0.20
## [25] crayon_1.4.0 RCurl_1.98-1.2 jsonlite_1.7.2
## [28] genefilter_1.70.0 glue_1.4.2 gtable_0.3.0
## [31] zlibbioc_1.34.0 XVector_0.28.0 car_3.0-10
## [34] abind_1.4-5 scales_1.1.1 mvtnorm_1.1-1
## [37] DBI_1.1.1 rstatix_0.6.0 Rcpp_1.0.6
## [40] xtable_1.8-4 progress_1.2.2 emdbook_1.3.12
## [43] htmlTable_2.1.0 foreign_0.8-81 bit_4.0.4
## [46] htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [49] ellipsis_0.3.1 farver_2.0.3 pkgconfig_2.0.3
## [52] XML_3.99-0.5 nnet_7.3-15 dbplyr_2.0.0
## [55] locfit_1.5-9.4 labeling_0.4.2 tidyselect_1.1.0
## [58] rlang_0.4.10 AnnotationDbi_1.50.3 munsell_0.5.0
## [61] cellranger_1.1.0 tools_4.0.2 cachem_1.0.1
## [64] cli_2.3.0 generics_0.1.0 RSQLite_2.2.3
## [67] broom_0.7.4 evaluate_0.14 fastmap_1.1.0
## [70] yaml_2.2.1 knitr_1.31 bit64_4.0.5
## [73] fs_1.5.0 zip_2.1.1 caTools_1.18.1
## [76] xml2_1.3.2 compiler_4.0.2 rstudioapi_0.13
## [79] curl_4.3 png_0.1-7 ggsignif_0.6.0
## [82] reprex_1.0.0 geneplotter_1.66.0 stringi_1.5.3
## [85] highr_0.8 vctrs_0.3.6 pillar_1.4.7
## [88] lifecycle_0.2.0 data.table_1.13.6 bitops_1.0-6
## [91] R6_2.5.0 latticeExtra_0.6-29 KernSmooth_2.23-18
## [94] gridExtra_2.3 rio_0.5.16 MASS_7.3-53
## [97] assertthat_0.2.1 openssl_1.4.3 withr_2.4.1
## [100] GenomeInfoDbData_1.2.3 hms_1.0.0 rpart_4.1-15
## [103] coda_0.19-4 rmarkdown_2.6 carData_3.0-4
## [106] bbmle_1.0.23.1 numDeriv_2016.8-1.1 lubridate_1.7.9.2
## [109] base64enc_0.1-3